Push the process of waiting for devices to come up right out of DevController,
authoremellor@leeni.uk.xensource.com <emellor@leeni.uk.xensource.com>
Tue, 8 Nov 2005 02:34:23 +0000 (03:34 +0100)
committeremellor@leeni.uk.xensource.com <emellor@leeni.uk.xensource.com>
Tue, 8 Nov 2005 02:34:23 +0000 (03:34 +0100)
and drive this process from xm create.  Architecturally, this separates the
process of domain creation from the process of waiting for devices, allowing
tools to choose when to perform that wait (if at all).  This places the waiting
in the same category as the unpause after domain creation, architecturally.

The main advantage to this approach is that it takes waiting for devices out
of the scope of XendDomain's domains_lock.  When restarting a domain, the
watch would fire for @releaseDomain while we were waiting for new domains to
come up.  This would deadlock the watch thread, as no more new watches could
be delivered.  Closes bug #387.

In the longer term, we expect to be able to wait for devices to come up
completely, not simply to wait for the hotplug scripts to run, and so it is
necessary then to move the waiting procedure right out of the server, so that
it can be performed after the domain unpause.  Without unpausing the domain,
the frontends will not come up, and so we cannot detect successful device
completion.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
tools/python/xen/xend/XendClient.py
tools/python/xen/xend/XendDomainInfo.py
tools/python/xen/xend/server/DevController.py
tools/python/xen/xend/server/SrvDomain.py
tools/python/xen/xm/create.py

index 9cbef2401b7ed91153ae73ec076507c471f34836..6af92beec74aa21cd97846008916c2e6c7ca7e33 100644 (file)
@@ -220,6 +220,10 @@ class Xend:
     def xend_domain(self, id):
         return self.xendGet(self.domainurl(id))
 
+    def xend_domain_wait_for_devices(self, id):
+        return self.xendPost(self.domainurl(id),
+                             {'op'      : 'wait_for_devices' })
+
     def xend_domain_unpause(self, id):
         return self.xendPost(self.domainurl(id),
                              {'op'      : 'unpause' })
index e03e5a63c583a3917f4e9d254b0e53aac31815dc..adf656b5e3795a76d6227dc410922f71f69f4734 100644 (file)
@@ -897,6 +897,14 @@ class XendDomainInfo:
         return self.getDeviceController(deviceClass).createDevice(devconfig)
 
 
+    def waitForDevices_(self, deviceClass):
+        return self.getDeviceController(deviceClass).waitForDevices()
+
+
+    def waitForDevice(self, deviceClass, devid):
+        return self.getDeviceController(deviceClass).waitForDevice(devid)
+
+
     def reconfigureDevice(self, deviceClass, devid, devconfig):
         return self.getDeviceController(deviceClass).reconfigureDevice(
             devid, devconfig)
@@ -1232,6 +1240,15 @@ class XendDomainInfo:
             self.image.createDeviceModel()
 
 
+    def waitForDevices(self):
+        """Wait for this domain's configured devices to connect.
+
+        @raise: VmError if any device fails to initialise.
+        """
+        for c in controllerClasses:
+            self.waitForDevices_(c)
+
+
     def device_create(self, dev_config):
         """Create a new device.
 
@@ -1239,6 +1256,7 @@ class XendDomainInfo:
         """
         dev_type = sxp.name(dev_config)
         devid = self.createDevice(dev_type, dev_config)
+        self.waitForDevice(dev_type, devid)
 #        self.config.append(['device', dev.getConfig()])
         return self.getDeviceController(dev_type).sxpr(devid)
 
index eed63ebbf93f25007180ae80b78f5a6d83a08c2f..f648562c1afb3bb0565d497e4692e20ae14a0cdb 100644 (file)
@@ -62,6 +62,18 @@ class DevController:
 
         self.writeDetails(config, devid, back, front)
 
+        return devid
+
+
+    def waitForDevices(self):
+        log.debug("Waiting for devices %s.", self.deviceClass)
+        
+        return map(self.waitForDevice, self.deviceIDs())
+
+
+    def waitForDevice(self, devid):
+        log.debug("Waiting for %s.", devid)
+        
         status, fn_ret = self.waitForBackend(devid)
         if status:
             self.destroyDevice(devid)
@@ -74,7 +86,6 @@ class DevController:
             raise VmError( ("Device %s (%s) could not be connected. "
                             "Backend device not found!") 
                             % (devid, self.deviceClass))
-        return devid
 
 
     def reconfigureDevice(self, devid, config):
@@ -122,10 +133,11 @@ class DevController:
         specified device.  This would be suitable for giving to {@link
         #createDevice} in order to recreate that device."""
 
-        backdomid = int(xstransact.Read(self.frontendPath(devid),
-                                        "backend-id"))
-
-        return [self.deviceClass, ['backend', backdomid]]
+        backdomid = xstransact.Read(self.frontendPath(devid), "backend-id")
+        if backdomid is None:
+            raise VmError("Device %s not connected" % devid)
+        
+        return [self.deviceClass, ['backend', int(backdomid)]]
 
 
     def sxprs(self):
@@ -200,7 +212,10 @@ class DevController:
     def readBackend(self, devid, *args):
         frontpath = self.frontendPath(devid)
         backpath = xstransact.Read(frontpath, "backend")
-        return xstransact.Read(backpath, *args)
+        if backpath:
+            return xstransact.Read(backpath, *args)
+        else:
+            raise VmError("Device %s not connected" % devid)
 
 
     def deviceIDs(self):
@@ -242,6 +257,8 @@ class DevController:
         frontpath = self.frontendPath(devid)
         backpath  = self.backendPath(backdom, devid)
         
+        xstransact.Remove(backpath, HOTPLUG_STATUS_NODE)
+
         frontDetails.update({
             'backend' : backpath,
             'backend-id' : "%i" % backdom.getDomid()
@@ -266,7 +283,10 @@ class DevController:
         ev = Event()
 
         def hotplugStatus():
-            status = self.readBackend(devid, HOTPLUG_STATUS_NODE)
+            try:
+                status = self.readBackend(devid, HOTPLUG_STATUS_NODE)
+            except VmError:
+                status = "died"
             if status is not None:
                 watch.xs.unwatch(backpath, watch)
                 hotplugStatus.value = status
@@ -276,14 +296,16 @@ class DevController:
         frontpath = self.frontendPath(devid)
         backpath = xstransact.Read(frontpath, "backend")
 
-        watch = xswatch(backpath, hotplugStatus)
+        if backpath:
+            watch = xswatch(backpath, hotplugStatus)
 
-        ev.wait(DEVICE_CREATE_TIMEOUT)
-        if ev.isSet():
-            return (0, hotplugStatus.value)
+            ev.wait(DEVICE_CREATE_TIMEOUT)
+            if ev.isSet():
+                return (0, hotplugStatus.value)
+            else:
+                return (-1, hotplugStatus.value)
         else:
-            return (-1, hotplugStatus.value)
-
+            return (-1, "missing")
 
 
     def backendPath(self, backdom, devid):
index 8232f28e7ab20c762ce25aefde609eb39b7cbfa6..a3e1584132a8be4ddbb18a8f77f66597c731e1d1 100644 (file)
@@ -13,6 +13,7 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
+# Copyright (C) 2005 Xensource Ltd
 #============================================================================
 
 from xen.web import http
@@ -62,6 +63,10 @@ class SrvDomain(SrvDir):
         self.acceptCommand(req)
         return self.dom.send_sysrq(int(req.args['key'][0]))
 
+    def op_wait_for_devices(self, _, req):
+        self.acceptCommand(req)
+        return self.dom.waitForDevices()
+
     def op_destroy(self, _, req):
         self.acceptCommand(req)
         return self.xd.domain_destroy(self.dom.domid)
index a290b6347ae21a13725d03aa15326787ab5f8156..aca68da365c3f865d332bd0ab717635879bfa0ce 100644 (file)
@@ -815,6 +815,10 @@ def make_domain(opts, config):
 
     dom = sxp.child_value(dominfo, 'name')
 
+    if server.xend_domain_wait_for_devices(dom) < 0:
+        server.xend_domain_destroy(dom)
+        err("Device creation failed for domain %s" % dom)
+
     if not opts.vals.paused:
         if server.xend_domain_unpause(dom) < 0:
             server.xend_domain_destroy(dom)